
cd "../replication-package"

/*
   In this do-file I clean the GSS gender role attitude variables 
   
   1972 and onward.
   
   Using the decade-mean to fill out each decade. 
   
   If the whole decade is missing, using the public regional-level data to fill in.

   
*/


//... raw restricted state-level attitude ..//
//    Extracted/Accessed around December 20th, 2019 (based on restricted file provided from the GSS) 
//    weighted state-year level of attitude variables 
clear
use "data/raw/gss_indicators_all_raw.dta"
gen year_n = year
rename state_abbrev state_a 

// make sure all years are present; it will make merging easier
preserve 
keep state_a 
bys state_a: keep if _n==1 
expand 49 // 1972 ~ 2020
bys state_a: gen year_n = 1971 + _n 
sort state_a year_n 
tempfile gss_allyears 
save `gss_allyears'
restore 

merge 1:1 state_a year_n using `gss_allyears', gen(merge_years)
tab merge_years 
drop merge_years 
order year_n, after(year)
drop year 
rename year_n year 
sort state_a year
keep if state_a!=.
tab year // 51 states, 47 years 

** generate state-level measure 
preserve 
collapse (mean) abany_7218 mwork_whome_7218 presch_suffer_7218 kids_suffer_7218, by(state_a)
keep if state_a!=.
foreach var in abany mwork_whome presch_suffer kids_suffer {
	rename `var'_7218 `var'_ch
	label var `var'_ch "`var' all-year mean (Charles et al.)"
}
tempfile gss_allstate 
save `gss_allstate'
restore 

merge m:1 state_a using `gss_allstate', gen(merge_state)
tab merge_state 
drop merge_state 

** calculate decade and five-year means 
recode year (1972/1979 = 1 "1970s") (1980/1989 = 2 "1980s") (1990/1999 = 3 "1990s") (2000/2009 = 4 "2000s") (2010/2020 = 5 "2010s"), gen(decade)

foreach v in abany mwork_whome presch_suffer kids_suffer {
	rename `v'_7218 `v'_og 
	gen `v'_dc = .
	label var `v'_dc "Decade mean of `v', by state"
	
	forval d = 1/5 {
	egen `v'_dc`d' = mean(`v'_og) if decade==`d', by(state_a)
	qui replace `v'_dc = `v'_dc`d' if decade==`d'
	drop `v'_dc`d'
	}

}
sort state_a year 

order state_a year decade *_dc 

** assign region 
gen region = .
decode state_a, gen(state_abbrev_var)
// New England 
replace region = 1 if state_abbrev_var == "CT" | state_abbrev_var== "ME" | state_abbrev_var== "MA" | state_abbrev_var== "NH" | state_abbrev_var== "RI" | state_abbrev_var== "VT"
// MID-ATLANTIC
replace region =2 if state_abbrev_var== "NJ" | state_abbrev_var== "NY" | state_abbrev_var== "PA"
// MIDWEST - East north central
replace region =3 if state_abbrev_var== "IL" | state_abbrev_var== "IN" | state_abbrev_var== "MI" | state_abbrev_var== "OH" | state_abbrev_var== "WI"	
// MIDWEST - West north central
replace region = 4 if state_abbrev_var== "IA" | state_abbrev_var== "KS" | state_abbrev_var== "MN" | state_abbrev_var== "ND" | state_abbrev_var== "SD" | state_abbrev_var== "NE" | state_abbrev_var== "MO"
// SOUTH ATLANTIC
replace region = 5 if state_abbrev_var== "DE" | state_abbrev_var== "FL" | state_abbrev_var== "GA" | state_abbrev_var== "MD" | state_abbrev_var== "NC" | state_abbrev_var== "SC" | state_abbrev_var== "VA" | state_abbrev_var== "DC" | state_abbrev_var== "WV"	
// SOUTH - EAST CENTRAL 
replace region = 6 if state_abbrev_var== "AL" | state_abbrev_var== "KY" | state_abbrev_var== "MS" |  state_abbrev_var== "TN" 
// SOUTH - WEST CENTRAL 
replace region = 7 if state_abbrev_var== "AR" | state_abbrev_var== "LA" | state_abbrev_var== "OK" |  state_abbrev_var== "TX" 
// MOUNTAIN WEST
replace region = 8 if state_abbrev_var== "AZ" | state_abbrev_var== "CO" | state_abbrev_var== "ID" | state_abbrev_var== "MT" | state_abbrev_var== "NV" | state_abbrev_var== "NM" | state_abbrev_var== "UT" | state_abbrev_var== "WY"
// WEST PACIFIC
replace region = 9 if state_abbrev_var== "AK" | state_abbrev_var== "CA" | state_abbrev_var== "HI" | state_abbrev_var== "OR" | state_abbrev_var== "WA"

label define region_lab 1 "New Eng" 2 "Mid Atlantic" 3 "East North Central" 4 "West North Central" 5 "South Atlantic" 6 "East South Central" 7 "West South Central" 8 "Mountain" 9 "Pacific" , replace
label values region region_lab
tab region 	

tempfile gss_bystate 
save `gss_bystate'

//.. region-level data, public access ..//
clear 
do "data/raw/gss-result/GSS.do"
tolower
order year id_ region 
// data accessed Oct 23, 2023 

keep year id_ wtssall abany region fefam fechld fepresch  
keep if inrange(year, 1972, 2019)
tab year // using ~ 2018 data. 

* Women should be able to get an abortion for any reason 
fre abany 
recode abany (.d = .) (.i = .) (.n = .) (1 = 0) (2 = 1), gen(abany_pub)
label var abany_pub "Women's abortion right for any reason. 1 means no"
tab abany_pub, m 

* Women tend home men work
fre fefam
recode fefam (.d = .) (.i = .) (.n = .) (1=4) (2=3) (3=2) (4=1), gen(mwork_whome_pub)
label var mwork_whome_pub "Better if men work women home. Higher is more con."
tab mwork_whome_pub

* Mother working hurts children
fre fechld
recode fechld (.d = .) (.i = .) (.n = .), gen(kids_suffer_pub)
label var kids_suffer_pub "Mother working hurst children. Higher is more con."
tab kids_suffer, m

* Preschool kids suffer if mother works
fre fepresch
recode fepresch (.d = .) (.i = .) (.n = .) (1=4) (2=3) (3=2) (4=1), gen(presch_suffer_pub)
label var presch_suffer_pub "Preschl kids suffer if mother works. Higher is more con."
tab presch_suffer_pub, m

*** Collapsing by region and year
collapse (mean) *_pub [pw=wtssall], by(region year)

// make sure all years are present 
gen year_n = year 

preserve 
keep region 
bys region: keep if _n==1 
expand 49 // 1972 ~ 2020
bys region: gen year_n = 1971 + _n 
sort region year_n 
tempfile pub_allyears 
save `pub_allyears'
restore 

merge 1:1 region year_n using `pub_allyears', nogen
order year_n, after(year)
drop year 
rename year_n year
sort region year

bys year: sum *_pub
sort region year

// fill in years in between using linear interpolation (e.g., biannual)
foreach var in abany mwork_whome kids_suffer presch_suffer {
	bys region (year): ipolate `var'_pub year, gen(`var'_pub_m)
}

keep year region *_pub_m

// generating decade mean of region 
recode year (1972/1979 = 1 "1970s") (1980/1989=2 "1980s") (1990/1999=3 "1990s") (2000/2009=4 "2000s") (2010/2020=5 "2010s"), gen(decade)

foreach v in abany_pub_m mwork_whome_pub_m kids_suffer_pub_m presch_suffer_pub_m {
	gen `v'_dc = .
	label var `v'_dc "Decade mean of `v', by region"
	
	forval d=1/5 {
			egen `v'_dc`d' = mean(`v') if decade==`d', by(region)
			replace `v'_dc = `v'_dc`d' if decade==`d'
			drop `v'_dc`d'
			
		}
	}

tempfile gss_public 
save `gss_public'

use `gss_bystate', clear 
merge m:1 region year using `gss_public', gen(merge_reg)
tab merge_reg 
drop merge_*
sort state_a year
keep state_a year decade *_ch *_pub_m *_dc

** which state has missing? 
describe, fullnames
foreach v in abany_dc mwork_whome_dc kids_suffer_dc presch_suffer_dc {
	gen `v'_miss = 0
	replace `v'_miss = 1 if `v'==.
	tab state_a decade if `v'_miss==1 
	
}

gen gss_decade_miss = 0
foreach v in abany_dc mwork_whome_dc kids_suffer_dc presch_suffer_dc {
	unique state_a if `v'_miss==1 & inrange(decade, 2, 5) // 15 states 
	replace gss_decade_miss = 1 if `v'==.
}

** replace with regional decade mean if missing 
foreach v in abany mwork_whome kids_suffer presch_suffer {
	gen `v'_n = `v'_dc 
	replace `v'_n = `v'_pub_m_dc if `v'_n==.
}

** replace 2019 with 2018 
qui levelsof state_a, local(statelb)
foreach st of local statelb {
	foreach var in abany_pub_m mwork_whome_pub_m kids_suffer_pub_m presch_suffer_pub_m {
		qui sum `var' if state_a==`st' & year==2018
		replace `var' = r(mean) if state_a==`st' & year==2019
	}
}

sum *_n // 2,499 = 51*49

keep state_a year decade gss_decade_miss *_ch *_n *_pub_m 

foreach var in abany mwork_whome kids_suffer presch_suffer {
	label var `var'_n "`var', decade X state, imputed using reg when nec."
	label var `var'_pub_m "`var', year X region, public data, interpolated missing years"
}

label var decade "Decade"

keep if inrange(year, 1982, 2020)
unique state_a if gss_decade_miss==1 

save "data/state/04_motherwork_value_GSS.dta", replace 


